/** * Copyright (c) 2010, 2013 Darmstadt University of Technology. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * Marcel Bruch - initial API and implementation. * Olav Lenz - introduce importSnippet() */ package org.eclipse.recommenders.snipmatch; import static java.util.Collections.emptySet; import static org.apache.commons.lang3.StringUtils.isBlank; import static org.apache.lucene.queryParser.QueryParser.Operator.AND; import static org.eclipse.recommenders.snipmatch.Location.*; import static org.eclipse.recommenders.utils.Constants.DOT_JSON; import java.io.File; import java.io.FileFilter; import java.io.IOException; import java.nio.channels.OverlappingFileLockException; import java.text.MessageFormat; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.UUID; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; import org.apache.commons.io.FileUtils; import org.apache.commons.io.filefilter.SuffixFileFilter; import org.apache.commons.lang3.StringUtils; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.KeywordAnalyzer; import org.apache.lucene.analysis.PerFieldAnalyzerWrapper; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Similarity; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.eclipse.recommenders.coordinates.ProjectCoordinate; import org.eclipse.recommenders.internal.snipmatch.Filenames; import org.eclipse.recommenders.internal.snipmatch.MultiFieldPrefixQueryParser; import org.eclipse.recommenders.utils.IOUtils; import org.eclipse.recommenders.utils.Recommendation; import org.eclipse.recommenders.utils.Uris; import org.eclipse.recommenders.utils.gson.GsonUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Function; import com.google.common.base.Preconditions; import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; import com.google.common.cache.LoadingCache; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Maps; public class FileSnippetRepository implements ISnippetRepository { public static final String NO_FILENAME_RESTRICTION = "*no filename restriction*"; private static final int MAX_SEARCH_RESULTS = 100; private static final int CACHE_SIZE = 200; private static final Set<String> EMPTY_STOPWORDS = emptySet(); private static final String F_NAME = "name"; private static final String F_DESCRIPTION = "description"; private static final String F_EXTRA_SEARCH_TERM = "extra"; private static final String F_TAG = "tag"; private static final String F_PATH = "path"; private static final String F_UUID = "uuid"; private static final String F_LOCATION = "location"; private static final String F_DEPENDENCY = "dependency"; private static final String F_FILENAME_RESTRICTION = "filenameRestriction"; private static final float NAME_BOOST = 4.0f; private static final float DESCRIPTION_BOOST = 2.0f; private static final float EXTRA_SEARCH_TERM_BOOST = DESCRIPTION_BOOST; private static final float TAG_BOOST = 1.0f; private static final float DEPENDENCY_BOOST = 1.0f; private static final float NO_RESTRICTION_BOOST = 0.5f; private Logger log = LoggerFactory.getLogger(getClass()); private volatile int timesOpened = 0; private final Lock readLock; private final Lock writeLock; private final String id; private final File snippetsdir; private final File indexdir; private final String repoUrl; private Directory directory; private IndexReader reader; private final Analyzer analyzer; private final QueryParser parser; private final Similarity similarity; private final LoadingCache<File, Snippet> snippetCache = CacheBuilder.newBuilder().maximumSize(CACHE_SIZE) .build(new CacheLoader<File, Snippet>() { @Override public Snippet load(File file) throws Exception { Snippet snippet = GsonUtil.deserialize(file, Snippet.class); return snippet; } }); public FileSnippetRepository(String id, File basedir) { Preconditions.checkArgument(basedir.isAbsolute()); Preconditions.checkArgument(CACHE_SIZE > MAX_SEARCH_RESULTS, "The cache size needs to be larger than the maximum number of search results."); this.id = id; snippetsdir = new File(basedir, "snippets"); indexdir = new File(basedir, "index"); repoUrl = Uris.mangle(basedir.toURI()); analyzer = createAnalyzer(); parser = createParser(); similarity = new IgnoreDocFrequencySimilarity(); ReadWriteLock readWriteLock = new ReentrantReadWriteLock(); readLock = readWriteLock.readLock(); writeLock = readWriteLock.writeLock(); } private Analyzer createAnalyzer() { StandardAnalyzer standardAnalyzer = new StandardAnalyzer(Version.LUCENE_35, EMPTY_STOPWORDS); Map<String, Analyzer> analyzers = Maps.newHashMap(); analyzers.put(F_NAME, standardAnalyzer); analyzers.put(F_DESCRIPTION, standardAnalyzer); analyzers.put(F_EXTRA_SEARCH_TERM, standardAnalyzer); analyzers.put(F_TAG, standardAnalyzer); analyzers.put(F_UUID, new KeywordAnalyzer()); analyzers.put(F_DEPENDENCY, standardAnalyzer); return new PerFieldAnalyzerWrapper(new KeywordAnalyzer(), analyzers); } private QueryParser createParser() { String[] searchFields = new String[] { F_NAME, F_DESCRIPTION, F_EXTRA_SEARCH_TERM, F_TAG, F_DEPENDENCY }; Map<String, Float> boosts = ImmutableMap.of(F_NAME, NAME_BOOST, F_DESCRIPTION, DESCRIPTION_BOOST, F_EXTRA_SEARCH_TERM, EXTRA_SEARCH_TERM_BOOST, F_TAG, TAG_BOOST, F_DEPENDENCY, DEPENDENCY_BOOST); QueryParser parser = new MultiFieldPrefixQueryParser(Version.LUCENE_35, searchFields, analyzer, boosts, F_NAME, F_DESCRIPTION, F_EXTRA_SEARCH_TERM, F_DEPENDENCY); parser.setDefaultOperator(AND); return parser; } @Override public void open() throws IOException { writeLock.lock(); try { timesOpened++; if (timesOpened > 1) { return; } snippetsdir.mkdirs(); indexdir.mkdirs(); directory = FSDirectory.open(indexdir); index(); reader = IndexReader.open(directory); } finally { writeLock.unlock(); } } public void index() throws IOException { writeLock.lock(); try { File[] snippetFiles = snippetsdir.listFiles((FileFilter) new SuffixFileFilter(DOT_JSON)); doIndex(snippetFiles); } catch (OverlappingFileLockException e) { throw new IOException(MessageFormat.format( "Failure while creating index at \u2018{0}\u2019. Repository was opened {1} times.", indexdir, timesOpened), e); } finally { writeLock.unlock(); } } private void doIndex(File[] snippetFiles) throws IOException { IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35, analyzer); config.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(directory, config); try { snippetCache.invalidateAll(); for (File snippetFile : snippetFiles) { try { ISnippet snippet = snippetCache.get(snippetFile); String path = snippetFile.getPath(); indexSnippet(writer, snippet, path); } catch (Exception e) { log.error("Failed to index snippet in " + snippetFile, e); } } } finally { writer.close(); } if (reader != null) { reader = IndexReader.openIfChanged(reader); } } private void indexSnippet(IndexWriter writer, ISnippet snippet, String path) throws IOException { Document doc = new Document(); doc.add(new Field(F_PATH, path, Store.YES, Index.NO)); doc.add(new Field(F_UUID, snippet.getUuid().toString(), Store.NO, Index.NOT_ANALYZED)); String name = snippet.getName(); doc.add(new Field(F_NAME, name, Store.YES, Index.ANALYZED)); String description = snippet.getDescription(); doc.add(new Field(F_DESCRIPTION, description, Store.YES, Index.ANALYZED)); for (String tag : snippet.getTags()) { doc.add(new Field(F_TAG, tag, Store.YES, Index.ANALYZED_NO_NORMS)); } for (String extraSearchTerm : snippet.getExtraSearchTerms()) { doc.add(new Field(F_EXTRA_SEARCH_TERM, extraSearchTerm, Store.YES, Index.ANALYZED)); } for (Location location : expandLocation(snippet.getLocation())) { Field field = new Field(F_LOCATION, getIndexString(location), Store.NO, Index.NOT_ANALYZED); field.setBoost(0); doc.add(field); } for (ProjectCoordinate dependency : snippet.getNeededDependencies()) { doc.add(new Field(F_DEPENDENCY, getDependencyString(dependency), Store.YES, Index.ANALYZED)); } if (snippet.getLocation() == Location.FILE) { if (snippet.getFilenameRestrictions().isEmpty()) { doc.add(new Field(F_FILENAME_RESTRICTION, NO_FILENAME_RESTRICTION, Store.NO, Index.NOT_ANALYZED)); } for (String restriction : snippet.getFilenameRestrictions()) { doc.add(new Field(F_FILENAME_RESTRICTION, restriction.toLowerCase(), Store.NO, Index.NOT_ANALYZED)); } } else { doc.add(new Field(F_FILENAME_RESTRICTION, NO_FILENAME_RESTRICTION, Store.NO, Index.NOT_ANALYZED)); } writer.addDocument(doc); } private String getDependencyString(ProjectCoordinate pc) { return pc.getGroupId() + ":" + pc.getArtifactId(); } private String getIndexString(Location location) { return location.name().toLowerCase().replace('_', '-'); } @VisibleForTesting public boolean isOpen() { return timesOpened > 0; } private ISnippet getSnippet(File snippetFile) { try { return snippetCache.get(snippetFile); } catch (Exception e) { log.error("Error while loading snippet from file {}", snippetFile.getAbsolutePath(), e); return null; } } @Override public List<Recommendation<ISnippet>> search(ISearchContext context) { return doSearch(context, Integer.MAX_VALUE); } @Override public List<Recommendation<ISnippet>> search(ISearchContext context, int maxResults) { if (isBlank(context.getSearchText())) { return Collections.emptyList(); } return doSearch(context, Math.min(maxResults, MAX_SEARCH_RESULTS)); } private List<Recommendation<ISnippet>> doSearch(ISearchContext context, int maxResults) { readLock.lock(); try { Preconditions.checkState(isOpen()); List<Recommendation<ISnippet>> results = Lists.newLinkedList(); try { Map<File, Float> snippetFiles = searchSnippetFiles(context, maxResults); for (Entry<File, Float> entry : snippetFiles.entrySet()) { ISnippet snippet = snippetCache.get(entry.getKey()); results.add(Recommendation.newRecommendation(snippet, entry.getValue())); } } catch (Exception e) { log.error("Exception occurred while searching the snippet index.", e); } return results; } finally { readLock.unlock(); } } private Map<File, Float> searchSnippetFiles(ISearchContext context, int maxResults) { Map<File, Float> results = Maps.newLinkedHashMap(); IndexSearcher searcher = null; try { BooleanQuery query = new BooleanQuery(); if (StringUtils.isBlank(context.getSearchText())) { query.add(new MatchAllDocsQuery(), Occur.MUST); } else { query.add(parser.parse(context.getSearchText()), Occur.MUST); } if (context.getLocation() != NONE) { query.add(new TermQuery(new Term(F_LOCATION, getIndexString(context.getLocation()))), Occur.MUST); } String filename = context.getFilename(); if (filename != null) { BooleanQuery filenameRestrictionsQuery = new BooleanQuery(); TermQuery noRestrictionQuery = new TermQuery(new Term(F_FILENAME_RESTRICTION, NO_FILENAME_RESTRICTION)); noRestrictionQuery.setBoost(NO_RESTRICTION_BOOST); filenameRestrictionsQuery.add(noRestrictionQuery, Occur.SHOULD); int i = 1; for (String restriction : Filenames.getFilenameRestrictions(filename)) { TermQuery restrictionQuery = new TermQuery( new Term(F_FILENAME_RESTRICTION, restriction.toLowerCase())); float boost = (float) (0.5f + Math.pow(0.5, i)); restrictionQuery.setBoost(boost); filenameRestrictionsQuery.add(restrictionQuery, Occur.SHOULD); i++; } query.add(filenameRestrictionsQuery, Occur.MUST); } searcher = new IndexSearcher(reader); searcher.setSimilarity(similarity); float maxScore = 0; for (ScoreDoc hit : searcher.search(query, null, maxResults).scoreDocs) { Document doc = searcher.doc(hit.doc); if (!snippetApplicable(doc, context)) { continue; } results.put(new File(doc.get(F_PATH)), hit.score); if (hit.score > maxScore) { maxScore = hit.score; } } return normalizeValues(results, maxScore); } catch (ParseException e) { // While typing, a user can easily create unparsable queries // (temporarily) log.info("Failed to parse query", e); } catch (Exception e) { log.error("Exception occurred while searching the snippet index.", e); } finally { IOUtils.closeQuietly(searcher); } return results; } private boolean snippetApplicable(Document doc, ISearchContext context) { if (!context.isRestrictedByDependencies()) { return true; } String[] snippetDependencies = doc.getValues(F_DEPENDENCY); for (String snippetDependency : snippetDependencies) { boolean applicable = false; for (ProjectCoordinate workspaceDependency : context.getDependencies()) { if (applicable(workspaceDependency, snippetDependency)) { applicable = true; break; } } if (!applicable) { return false; } } return true; } private boolean applicable(ProjectCoordinate pc, String dependency) { return getDependencyString(pc).equals(dependency); } private Collection<Location> expandLocation(Location location) { switch (location) { case JAVA_STATEMENTS: return ImmutableSet.of(JAVA_STATEMENTS); case JAVA_TYPE_MEMBERS: return ImmutableSet.of(JAVA_TYPE_MEMBERS); case JAVADOC: return ImmutableSet.of(JAVADOC); case JAVA: return ImmutableSet.of(JAVA, JAVA_STATEMENTS, JAVA_TYPE_MEMBERS); case JAVA_FILE: return ImmutableSet.of(JAVA_FILE, JAVADOC, JAVA, JAVA_STATEMENTS, JAVA_TYPE_MEMBERS); case FILE: return ImmutableSet.of(FILE, JAVA_FILE, JAVADOC, JAVA, JAVA_STATEMENTS, JAVA_TYPE_MEMBERS); case NONE: default: throw new IllegalArgumentException(location.toString()); } } private Map<File, Float> normalizeValues(Map<File, Float> results, final float maxScore) { return Maps.transformValues(results, new Function<Float, Float>() { @Override public Float apply(Float input) { return maxScore == 0.0f ? 1.0f : input / maxScore; } }); } @Override public boolean hasSnippet(UUID uuid) { readLock.lock(); try { Preconditions.checkState(isOpen()); return !searchSnippetFiles(new SearchContext(F_UUID + ":" + uuid), Integer.MAX_VALUE).isEmpty(); } finally { readLock.unlock(); } } @Override public boolean delete(UUID uuid) throws IOException { writeLock.lock(); try { Preconditions.checkState(isOpen()); Map<File, Float> snippetFiles = searchSnippetFiles(new SearchContext(F_UUID + ":" + uuid), Integer.MAX_VALUE); if (snippetFiles.isEmpty()) { return false; } Iterables.getOnlyElement(snippetFiles.keySet()).delete(); index(); return true; } finally { writeLock.unlock(); } } @Override public boolean isDeleteSupported() { return true; } @Override public String getId() { return id; } @Override public String getRepositoryLocation() { return repoUrl; } @Override public void close() { writeLock.lock(); try { if (timesOpened == 0) { return; } else if (timesOpened > 1) { timesOpened--; return; } else if (timesOpened == 1) { timesOpened = 0; IOUtils.closeQuietly(reader); IOUtils.closeQuietly(directory); reader = null; } } finally { writeLock.unlock(); } } @Override public void importSnippet(ISnippet snippet) throws IOException { writeLock.lock(); try { Preconditions.checkState(isOpen()); Snippet importSnippet = checkTypeAndConvertSnippet(snippet); File file; Map<File, Float> snippetFiles = searchSnippetFiles( new SearchContext(F_UUID + ":" + importSnippet.getUuid()), Integer.MAX_VALUE); if (snippetFiles.isEmpty()) { file = new File(snippetsdir, importSnippet.getUuid() + DOT_JSON); } else { file = Iterables.getOnlyElement(snippetFiles.keySet()); } GsonUtil.serialize(importSnippet, file); index(); } finally { writeLock.unlock(); } } @Override public boolean isImportSupported() { return true; } private Snippet checkTypeAndConvertSnippet(ISnippet snippet) { if (snippet instanceof Snippet) { return (Snippet) snippet; } else { return Snippet.copy(snippet); } } private static class IgnoreDocFrequencySimilarity extends DefaultSimilarity { private static final long serialVersionUID = 6048878092975074153L; @Override public float tf(float freq) { return 1.0f; } @Override public float idf(int docFreq, int numDocs) { return 1.0f; } } @Override public boolean delete() { writeLock.lock(); try { close(); try { FileUtils.deleteDirectory(snippetsdir); FileUtils.deleteDirectory(indexdir); return true; } catch (IOException e) { return false; } } finally { writeLock.unlock(); } } @Override public boolean share(Collection<UUID> uuids) { return false; } @Override public boolean isSharingSupported() { return false; } public ISnippet getSnippet(UUID uuid) { File snippetFile = getSnippetFile(uuid); if (snippetFile == null) { return null; } return getSnippet(snippetFile); } public File getSnippetFile(UUID uuid) { readLock.lock(); try { File file = new File(snippetsdir, uuid.toString() + DOT_JSON); return file.exists() ? file : null; } finally { readLock.unlock(); } } }